********************************************************************************

* Filename: Table_6_Student_Knowledge.do

********************************************************************************
*  This do-file is part of the collection of replication files for Schaffner, Glewwe, and Sharma, 
*  "Why Programs Fail:  Lessons for Improving Public Service Quality from Mixed Methods Evaluation
*   of an Unsuccessful Teacher Training Program in Nepal"
*
*This do-file calculates:
*      - all results in Table 6 of the WBER journal article
*	        
* Software version used: STATA/SE 18.0
*
* To learn more about the assessment items, users can study the Item Map spreadsheet that is included
* in the assessments folder (within the Instruments folder) of the replication files.  Specific questions
* (which may appear in one or more assessment versions) are listed in Column G of the item map.  
*  Columns A through F  in the item map indicate the question numbers under which a given
*  question can be found in various assessments (baseline grade 8, baseline grade 9,
*  endling grade 9 version a, and so forth).
* 
********************************************************************************
*GLOBAL FILE PATH DEFINITIONS
	global PBRfolder "ADD FILE REFERENCE TO MAIN FOLDER HERE" 
	global datasets = "$PBRfolder\Datasets"
	global logs = "$PBRfolder\Logs"

********************************************************************************
* SET-UP
	set more off
	clear all
	capture log close
	log using "$logs\Table_6_BelowGradeLevel", replace
	cd "$datasets"
********************************************************************************
*
****DATA PREPARATION AND CALCULATIONS FOR TABLE 6 ON STUDENT BELOW-GRADE-LEVEL PERFORMANCE
*
****************************************************************************

	* MATH ASSESSMENT ITEMS, GRADE 9

		use Math09_endline_IRT, clear  // Each row is for one student observed at endline, and each column is for
									   // an assessment item (i.e., a question) that appears on any one of the exam 
									   // versions.  An item can appear on more than one exam version. 
									   // If a given item was included on the version of the assessment that the
									   // given student took, the response is coded as correct or incorrect.  If the
									   // item did not appear on the version of the exam that this student took, a 
									   // missing value is recorded.
		tempfile master
		save "`master'", replace
			
		* Collapse master data to calculate proportion correct for each question
		use "`master'", clear
		collapse (mean) Math_002-Math_074 
			
		* Prepare for reshaping 
		forvalues i = 2/9 {
			rename Math_00`i' Correct_`i'
			}
		foreach num of numlist 10 12 13 15/21 23 25/37 43/48 50/64 67/70 72/74 {
			rename Math_0`num' Correct_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Correct_ , i(dummy) j(question)
		*l
		drop dummy
		sort question
		tempfile pctcorrect
		save "`pctcorrect'", replace 
			
		* Start with master data again and collapse to count numbers of obs for each question 
		use "`master'", clear 
		collapse (count) Math_002 - Math_074 
		
		**Prepare for reshaping 
		forvalues i = 2/9 {
			rename Math_00`i' Count_`i'
			}
		foreach num of numlist 10 12 13 15/21 23 25/37 43/48 50/64 67/70 72/74 {
			rename Math_0`num' Count_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Count_ , i(dummy) j(question)
		*list
		drop dummy
		sort question
		tempfile numobs
		save "`numobs'", replace  
		
		* Merge percent correct and number of obs 
		use "`pctcorrect'"
		sort question
		merge 1:1 question using "`numobs'"
		drop _merge
		*list
			
		* Prepare to list for the items included in Table 6 
		gen Pctcorrect = Correct_*100
		sort Pctcorrect
		format Pctcorrect %5.1f
		egen intable = anymatch(question), values(10, 51, 13, 3, 45, 6, 20)
		
		* List stats for Table 6, math assessment items, grade 9
		list question Pctcorrect Count_ if intable==1
			

	* MATH ASSESSMENT ITEMS, GRADE 10

		use Math10_endline_IRT, clear   // See note starting on line 41 above
		tempfile master 
		save "`master'", replace
			
		* Collapse master data to calculate proportion correct for each question
		use "`master'", clear
		collapse (mean) Math_002-Math_064 
			
		* Prepare for reshaping 
		forvalues i = 2/9 {
			rename Math_00`i' Correct_`i'
			}
		foreach num of numlist 10/36 43/48 50/64 {
			rename Math_0`num' Correct_`num'
			}
		gen dummy=1
			
		* Reshape so that each assessment item is in a different row, and save
		reshape long Correct_ , i(dummy) j(question)
		*l
		drop dummy
		sort question
		tempfile pctcorrect
		save "`pctcorrect'", replace 
			
		* Start with master data again and collapse to count numbers of obs for each question 
		use "`master'", clear 
		collapse (count) Math_002 - Math_064
		
		**Prepare for reshaping 
		forvalues i = 2/9 {
			rename Math_00`i' Count_`i'
			}
		foreach num of numlist 10/36 43/48 50/64  {
			rename Math_0`num' Count_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Count_ , i(dummy) j(question)
		*list
		drop dummy
		sort question
		tempfile numobs
		save "`numobs'", replace  
		
		* Merge percent correct and number of obs 
		use "`pctcorrect'"
		sort question
		merge 1:1 question using "`numobs'"
		drop _merge
		*list
			
		* Prepare to list for the items included in Table 6 
		gen Pctcorrect = Correct_*100
		sort Pctcorrect
		format Pctcorrect %5.1f
		egen intable = anymatch(question), values(10, 51, 13, 3, 45, 6, 20)
		
		* List stats for Table 6, math assessment items, grade 10  
		keep if intable==1
		*list question Pctcorrect Count_   // need to reverse order of first two
		gen orderraw=_n
		gen order=_n 
		replace order=1 if orderraw==2 
		replace order=2 if orderraw==1
		sort order 
		list question Count_ Pctcorrect

	* SCIENCE ASSESSMENT ITEMS, GRADE 9

		use Sci09_endline_IRT, clear  // See note starting on line 41 above
		tempfile master
		save "`master'", replace
			
		* Collapse master data to calculate proportion correct for each question
		use "`master'", clear
		collapse (mean) Sci_002-Sci_073 
			
		* Prepare for reshaping 
		forvalues i = 2/9 {
			rename Sci_00`i' Correct_`i'
			}
		foreach num of numlist 10/38 41 42 44 48/50 52 54/61 65 67 71/73 {
			rename Sci_0`num' Correct_`num'
			}
		gen dummy=1
			
		* Reshape so that each assessment item is in a different row, and save
		reshape long Correct_ , i(dummy) j(question)
		*l
		drop dummy
		sort question
		tempfile pctcorrect
		save "`pctcorrect'", replace 
		*list 
				
		* Start with master data again and collapse count numbers of obs for each question 
		use "`master'", clear 
		collapse (count) Sci_002-Sci_073
		
		**Prepare for reshaping 
		forvalues i = 2/9 {
			rename Sci_00`i' Count_`i'
			}
		foreach num of numlist 10/38 41 42 44 48/50 52 54/61 65 67 71/73  {
			rename Sci_0`num' Count_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Count_ , i(dummy) j(question)
		*list
		drop dummy
		sort question
		tempfile numobs
		save "`numobs'", replace  
		
		* Merge percent correct and number of obs 
		use "`pctcorrect'"
		sort question
		merge 1:1 question using "`numobs'"
		drop _merge
		*list
					
		* Prepare to list for the items included in Table 6 
		gen Pctcorrect = Correct_*100
		sort Pctcorrect
		format Pctcorrect %5.1f
		egen intable = anymatch(question), values(8,44,14,32,25,56)
		
		* List stats for Table 6, science assessment items, grade 9
		list question Count_ Pctcorrect if intable==1
		
		
	* SCIENCE ASSESSMENT ITEMS, GRADE 10

		use Sci10_endline_IRT, clear // See note starting on line 41 above
		tempfile master
		save "`master'", replace
			
		* Collapse master data to calculate proportion correct for each question
		use "`master'", clear
		collapse (mean) Sci_003-Sci_060 
			
		* Prepare for reshaping 
		forvalues i = 3/9 {
			rename Sci_00`i' Correct_`i'
			}
		foreach num of numlist 11/15 17 18 20/27 29/60 {
			rename Sci_0`num' Correct_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Correct_ , i(dummy) j(question)
		*l
		drop dummy
		sort question
		tempfile pctcorrect
		save "`pctcorrect'", replace 
				
		* Start with master data again and collapse to count numbers of obs for each question 
		use "`master'", clear 
		collapse (count) Sci_003-Sci_060 
		
		**Prepare for reshaping 
		forvalues i = 3/9 {
			rename Sci_00`i' Count_`i'
			}
		foreach num of numlist 11/15 17 18 20/27 29/60  {
			rename Sci_0`num' Count_`num'
			}
		gen dummy=1
		
		* Reshape so that each assessment item is in a different row, and save
		reshape long Count_ , i(dummy) j(question)
		*list
		drop dummy
		sort question
		tempfile numobs
		save "`numobs'", replace  
		
		* Merge percent correct and number of obs 
		use "`pctcorrect'"
		sort question
		merge 1:1 question using "`numobs'"
		drop _merge
		*list
				
		* Prepare to list for the items included in Table 6 
		gen Pctcorrect = Correct_*100
		sort Pctcorrect
		format Pctcorrect %5.1f
		egen intable = anymatch(question), values(8,44,14,32,25,56)
		
		* List stats for Table 6, science assessment items, grade 10
		keep if intable==1
		*list question Pctcorrect Count_   // need to fix order to match 9th grade
		gen orderraw=_n
		gen order=_n 
		replace order=3 if orderraw==2
		replace order=2 if orderraw==3
		replace order=5 if orderraw==6
		replace order=6 if orderraw==5
		sort order 
		list question Count_ Pctcorrect
	
log close